package org.jeecg.modules.house;

import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;

import java.util.concurrent.atomic.AtomicInteger;

/**
 * @Description: TODO
 * @author: zhongyin
 * @date: 2021年02月28日 21:26
 */
public class TestHouse  implements PageProcessor {
    private Site site = Site.me().setRetryTimes(3).setSleepTime(100);

    @Override
    public void process(Page page) {

        Html html = page.getHtml();
        Document document = html.getDocument();
        Elements beian_se2_1 = document.getElementsByClass("beian_se2_1");
        AtomicInteger num = new AtomicInteger(1);
        beian_se2_1.forEach(element -> {
            Elements nav1 = element.getElementsByClass("nav1");
            nav1.forEach(subElement -> {
                Elements a = subElement.getElementsByTag("a");
                System.out.println(a.text());
                System.out.println(num);
                num.getAndIncrement();
            });
        });


        // webmagic官方还有很多案例，更多内容自行参考学习了，例如配置代理，自带url去重、网页去重等功能
            // 官方文档地址：http://webmagic.io/docs/zh/
    }
    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
            Spider.create(new TestHouse()).addUrl(
                    "https://newhouse.0557fdc.com/")
                    .thread(1).run();
    }
}
